#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <libaio.h>
#include <libgen.h>
#include <pciaccess.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <sys/eventfd.h>
#include <sys/types.h>

#define DBG_SUBSYS S_LIBREPLICA

#include "core.h"
#include "dbg.h"
#include "disk.h"
#include "nodectl.h"
#include "configure.h"

#include "nvme.h"
#include "nvme_info.h"
#include "nvme_internal.h"
#include "nvme_pci.h"


#undef LIST_HEAD
#include "list.h"

#define NVME_IO_COMPLETED 0x01

struct nvme_managemnt {
        struct nvme_ctrlr *ctrlr;
        struct nvme_ns *ns;
        struct nvme_qpair *qpair;
};

struct nvme_complete_status {
        int aio_flag;
        int status;
        int error;
        task_t task;
};

struct qpair_entry {
        struct list_head hook;
        struct nvme_qpair *qpair;
};

static __thread struct qpair_entry g_qpairs[DISK_MAX];

static __thread struct list_head g_qpairs_list;
static __thread int poller_registerd = 0;

/*nvme io via libnvme*/
void libnvme_init(void)
{ 
        INIT_LIST_HEAD(&g_qpairs_list);
}

static inline int seg_count(const buffer_t *buf)
{
        int count = 0;
        struct list_head *pos;

        list_for_each(pos, &buf->list) { 
                count++; 
        }

        return count;
}

static IO_FUNC struct nvme_qpair *nvme_get_ioqpair(int disk_id)
{
        return g_qpairs[disk_id].qpair;
}

static void sprint_uint_var_dec(char *buf, uint8_t *array, unsigned int len)
{
	uint64_t result = 0;
	int i = len;

	while (i > 0) {
		result += (uint64_t)array[i - 1] << (8 * (i - 1));
		i--;
	}
	sprintf(buf, "%lu", result);
}

void IO_FUNC nvme_qpairs_poll(void *_core, void * ud)
{
        struct list_head *pos;
        struct qpair_entry *entry;
        core_t *core = _core;

        (void)ud;

        if(unlikely(!srv_running))
                return;

        if(unlikely(gloconf.nvme_stat && (core->counter ++ % 99988531) == 0)) {     /*note, must be a prime number*/

                list_for_each(pos, &g_qpairs_list) {     //todo. replace tls with ud. tls is draggy..
                        entry = (void *)pos;
                        struct spdk_nvme_health_information_page health_page;
                        struct spdk_nvme_intel_smart_information_page smart_page;
                        char key[128] = {0};
                        char data[128] = {0};
                        int i,ret = 0;
                        struct nvme_ctrlr *ctrlr = entry->qpair->ctrlr;

                        pthread_mutex_lock(&ctrlr->lock);

                        ret = get_health_log_page(entry->qpair->ctrlr, &health_page);
                        if (ret != 0)
                                goto out;
                        ret = get_intel_smart_log_page(ctrlr, &smart_page);
                        if (ret != 0)
                                goto out;

	                pthread_mutex_unlock(&ctrlr->lock);
                        
                        sprintf(key, "/nvme/%s/temperature",  ctrlr->slot);
                        for(i = 0;key[i];i++)
                                if(key[i] == ':')
                                        key[i] = '.';
                        sprintf(data, "%d",  (int)health_page.temperature - 273);
                        nodectl_set(key, data); 
                        DINFO("dump nvme infor, key:%s\r\n", key);   

                        for (i = 0; i < SPDK_COUNTOF(smart_page.attributes); i++) {
                                if (smart_page.attributes[i].code == NVME_INTEL_SMART_MEDIA_WEAR) {
                                        sprintf(key, "/nvme/%s/wear_level",  ctrlr->slot);
                                        for(i = 0;key[i];i++)
                                                if(key[i] == ':')
                                                        key[i] = '.';

                                        sprint_uint_var_dec(data, smart_page.attributes[i].raw_value, 6);
                                        nodectl_set(key, data); 
                                        DINFO("dump nvme infor, key:%s\r\n", key);    
                                }

                                if (smart_page.attributes[i].code == NVME_INTEL_SMART_WEAR_LEVELING_COUNT) {
                                        sprintf(key, "/nvme/%s/wear_level_count",  ctrlr->slot);
                                        for(i = 0;key[i];i++)
                                                if(key[i] == ':')
                                                        key[i] = '.';

                                        sprintf(data, "%d %d %d", *((uint16_t *)&smart_page.attributes[i].raw_value[2]), *((uint16_t *)&smart_page.attributes[i].raw_value[4]), *((uint16_t *)&smart_page.attributes[i].raw_value[6]));
                                        nodectl_set(key, data); 
                                        DINFO("dump nvme infor, key:%s\r\n", key);
			        }

                        }

                        DINFO("dump nvme infor finish\r\n");                        
out:
                        if(ret)
                                DERROR("get nvme information error.\r\n");
                }
        }

        list_for_each(pos, &g_qpairs_list)      //todo. replace tls with ud. tls is draggy..
        {
                entry = (void *)pos;
                if (likely(entry->qpair))
                        nvme_ioqp_poll(entry->qpair, 64);
        }
}

static int __nvme_add_ioqpair__(va_list ap)
{
        int ret = 0;
        int disk_id = va_arg(ap, int);
        struct nvme_managemnt *nt = va_arg(ap, struct nvme_managemnt *);
        va_end(ap);

        g_qpairs[disk_id].qpair = nvme_ioqp_get(nt->ctrlr, 0, 0);
        if (unlikely(!g_qpairs[disk_id].qpair)) {
                DERROR("Allocate I/O qpair failed\n");
                return ENODEV;
        }

        list_add_tail(&g_qpairs[disk_id].hook, &g_qpairs_list);

        DINFO("nvme add qpair: %p\r\n", g_qpairs[disk_id].qpair);

        if(!poller_registerd) {
                poller_registerd = 1;
                ret = core_poller_register(core_self(), "native_nvme", nvme_qpairs_poll, NULL);
        }

        return ret;
}

static int nvme_add_ioqpair(int core_hash, int disk_id, struct nvme_managemnt *nt)
{
        int ret;

        ret = core_request(core_hash, -1, "nvme_add_ioqpair", __nvme_add_ioqpair__, disk_id, nt);

        if (unlikely(ret))
                GOTO(err_ret, ret);

        return 0;
err_ret:
        return ret;
}

static int __nvme_pop_ioqpair__(va_list ap)
{
        int disk_id = va_arg(ap, int);
        va_end(ap);

        if(poller_registerd) {
                poller_registerd = 0;
                core_poller_unregister(core_self(), nvme_qpairs_poll);
        }

        if(!g_qpairs[disk_id].qpair)
                return 0;

        DINFO("nvme remove qpair: %p\r\n", g_qpairs[disk_id].qpair);
        nvme_ioqp_release(g_qpairs[disk_id].qpair);

        g_qpairs[disk_id].qpair = NULL;
        list_del(&g_qpairs[disk_id].hook);

        return 0;
}

static int nvme_pop_ioqpair(int core_hash, int disk_id)
{
        int ret;

        ret = core_request(core_hash, -1, "nvme_add_ioqpair", __nvme_pop_ioqpair__, disk_id);
        if (unlikely(ret))
                GOTO(err_ret, ret);

        return 0;
err_ret:
        return ret;
}

static void nvme_qpair_complete(void *arg, const struct nvme_cpl *cpl)
{
        int error;
        struct nvme_complete_status *status = arg;

        error = nvme_cpl_is_error(cpl);
        if (unlikely(error)) {
                DERROR("%s\n", get_status_string(cpl->status.sct, cpl->status.sc));
                YASSERT(!error);
        }

        if (likely(status->aio_flag)) {
                schedule_resume(&status->task, error, NULL);
        }

        status->error = error;
        status->status = NVME_IO_COMPLETED;
}

static void nvme_qpair_wait(struct nvme_qpair *qpair, struct nvme_complete_status *status)
{
        while (1) {
                nvme_ioqp_poll(qpair, 64);
                if (status->status == NVME_IO_COMPLETED)
                        break;
        }
}

static int nvme_disk_io_pread(const disk_t *disk, char *buf, size_t size, off_t offset)
{
        int ret;
        uint64_t ofst;
        uint32_t sectors;
        struct nvme_complete_status status = {0};
        struct nvme_managemnt *nt = disk->disk_fd;

        YASSERT(size % nt->ns->sector_size == 0);
        YASSERT(offset % nt->ns->sector_size == 0);

        sectors = size / nt->ns->sector_size;
        ofst = offset / nt->ns->sector_size;

        ret = nvme_ns_read(nt->ns, nt->qpair, buf, ofst, sectors, nvme_qpair_complete, &status, 0);
        if (unlikely(ret)) {
                ret = -ret;
                GOTO(err_ret, ret);
        }

        nvme_qpair_wait(nt->qpair, &status);
        if (unlikely(status.error)) {
                ret = EIO;
                GOTO(err_ret, ret);
        }

        return size;
err_ret:
        return ret;
}

static int nvme_disk_io_pwrite(const disk_t *disk, char *buf, size_t size, off_t offset)
{
        int ret;
        uint64_t ofst;
        uint32_t sectors;
        struct nvme_complete_status status = {0};
        struct nvme_managemnt *nt = disk->disk_fd;

        YASSERT(size % nt->ns->sector_size == 0);
        YASSERT(offset % nt->ns->sector_size == 0);

        sectors = size / nt->ns->sector_size;
        ofst = offset / nt->ns->sector_size;

        ret = nvme_ns_write(nt->ns, nt->qpair, buf, ofst, sectors, nvme_qpair_complete, &status, 0);
        if (unlikely(ret)) {
                ret = -ret;
                GOTO(err_ret, ret);
        }

        nvme_qpair_wait(nt->qpair, &status);
        if (unlikely(status.error)) {
                ret = EIO;
                GOTO(err_ret, ret);
        }

        return size;
err_ret:
        return ret;
}

static int IO_FUNC __nvme_disk_aio_readv(const disk_t *disk, const chkid_t *chkid, buffer_t *buf, off_t offset, int prio)
{
        int ret;
        uint64_t ofst;
        uint32_t sectors;
        size_t size = buf->len;
        void *buffer = NULL;
        struct nvme_qpair *qpair = NULL;
        struct nvme_complete_status status = {0};
        struct nvme_managemnt *nt = disk->disk_fd;

        status.aio_flag = 1;
        status.task = schedule_task_get();

        qpair = nvme_get_ioqpair(disk->idx);
        if (unlikely(!qpair)) {
                ret = ENODEV;
                GOTO(err_ret, ret);
        }

        if(unlikely(seg_count(buf) > 1)) { /*todo. possibly come from snapshot.*/
                ret = ymalloc(&buffer, size);
                if(unlikely(ret))
                        GOTO(err_ret, ret);
        }

        YASSERT(size % nt->ns->sector_size == 0);
        YASSERT(offset % nt->ns->sector_size == 0);

        sectors = size / nt->ns->sector_size;
        ofst = offset / nt->ns->sector_size;

        ret = nvme_ns_read(nt->ns, qpair, buffer?buffer:((seg_t *)buf->list.next)->handler.ptr, ofst, sectors, nvme_qpair_complete, &status, 0);

        if (unlikely(ret)) {
                ret = -ret;
                GOTO(err_ret, ret);
        }

        ret = schedule_yield("nvme_disk_aio_readv", NULL, &status);
        if (unlikely(ret)) {
                ret = EIO;
                GOTO(err_ret, ret);
        }

        if(unlikely(buffer)) {
                mbuffer_attach(buf, buffer, size, buffer);

                return 0;
        }

        return 0;
err_ret:
        if(unlikely(buffer))
                yfree(&buffer);

        return ret;
}

#if ENABLE_ALIGN_NEW
static int IO_FUNC nvme_disk_aio_readv(const disk_t *disk, const chkid_t *chkid,
                                         buffer_t *buf,off_t real_offset, int prio)
{
        int ret, trans = 0;
        seg_t *seg;
        uint64_t newoff;
        buffer_t *newbuf, __tmp;
        char tmp[PAGE_SIZE];

        YASSERT(chkid);
        int align = 0;
        align = (chkid->type != __RAW_CHUNK__
                 || (chkid->type == __RAW_CHUNK__
                     && gloconf.read_modify_write));

        if (unlikely(align && !is_aligned(buf, real_offset))) {
                trans = 1;

                newbuf = &__tmp;
                ret = buffer_align_trans(buf, real_offset, newbuf, &newoff);
                if (unlikely(ret))
                        GOTO(err_ret, ret);

#if ENABLE_CHUNK_DEBUG
                DINFO("aio align read "CHKID_FORMAT" disk %u "
                                "raw (size %u chunk offset %d disk offset %ju) "
                                "align (size %u)\n",
                                CHKID_ARG(chkid), disk->idx,
                                buf->len, real_offset,
                                newbuf->len, newoff
                    );
#else
                DBUG("aio align read "CHKID_FORMAT" disk %u "
                                "raw (size %u chunk offset %d disk offset %ju) "
                                "align (size %u)\n",
                                CHKID_ARG(chkid), disk->idx,
                                buf->len, real_offset,
                                newbuf->len, newoff
                    );

#endif
        } else {
                newbuf = buf;
                newoff = real_offset;
        }

        YASSERT(real_offset >= newoff);

        ret = __nvme_disk_aio_readv(disk, chkid, newbuf, newoff, prio);
        if (unlikely(ret)) {
                DERROR("disk[%u] ret %d %s\n", disk->idx, ret, strerror(ret));
                GOTO(err_free, ret);
        }

        if (unlikely(trans)) {
                YASSERT(real_offset - newoff < PAGE_SIZE);
                ret = mbuffer_popmsg(newbuf, tmp, real_offset - newoff);
                if (unlikely(ret))
                        YASSERT(0 && "why?");

                seg = (seg_t *)buf->list.next;
                YASSERT(buf->len == seg->len);
                ret = mbuffer_popmsg(newbuf, seg->handler.ptr, buf->len);
                if (unlikely(ret))
                        YASSERT(0 && "why?");

                DBUG("mbuffer_free len %u\n", newbuf->len);
                mbuffer_free(newbuf);
        }

        return 0;
err_free:
        mbuffer_free(newbuf);
err_ret:
        return ret;
}

#else
static int IO_FUNC nvme_disk_aio_readv(const disk_t *disk, const chkid_t *chkid,
                                         buffer_t *buf,off_t real_offset, int prio)
{
        return __nvme_disk_aio_readv(disk, chkid, buf, real_offset, prio);
}
#endif

static int IO_FUNC __nvme_disk_aio_writev(const disk_t *disk, const chkid_t *chkid, const buffer_t *buf, off_t offset, int prio)
{
        int ret;
        uint64_t ofst;
        uint32_t sectors;
        void *buffer = NULL;
        size_t size = buf->len;
        struct nvme_qpair *qpair = NULL;
        struct nvme_complete_status status = {0};
        struct nvme_managemnt *nt = disk->disk_fd;

        status.aio_flag = 1;
        status.task = schedule_task_get();

        ANALYSIS_BEGIN(0);

        qpair = nvme_get_ioqpair(disk->idx);
        if (unlikely(!qpair)) {
                ret = ENODEV;
                GOTO(err_ret, ret);
        }

        if(unlikely(seg_count(buf) > 1)) { /*todo. possibly come from snapshot.*/
                ret = ymalloc(&buffer, size);
                if(unlikely(ret))
                        GOTO(err_ret, ret);

                mbuffer_get(buf, buffer, size);
        }

        YASSERT(size % nt->ns->sector_size == 0);
        YASSERT(offset % nt->ns->sector_size == 0);

        sectors = size / nt->ns->sector_size;
        ofst = offset / nt->ns->sector_size;

        ret = nvme_ns_write(nt->ns, qpair, buffer?buffer:((seg_t *)buf->list.next)->handler.ptr, ofst, sectors, nvme_qpair_complete, &status, 0);
        if (unlikely(ret)) {
                ret = -ret;
                GOTO(err_ret, ret);
        }

        ret = schedule_yield("nvme_disk_aio_writev", NULL, &status);
        if (unlikely(ret)) {
                ret = EIO;
                GOTO(err_ret, ret);
        }

err_ret:
        if(unlikely(buffer))
                yfree(&buffer);

        ANALYSIS_QUEUE(0, IO_WARN, "nvme_disk_aio_writev");

        return ret;
}

#if ENABLE_ALIGN_NEW

static int IO_FUNC nvme_disk_aio_writev(const disk_t *disk, const chkid_t *chkid,
                                        const buffer_t *buf, off_t real_offset, int prio)
{
        int ret, trans = 0;
        uint64_t newoff;
        seg_t *seg;
        buffer_t *newbuf = NULL, __tmp;

        YASSERT(chkid);

        int align = 0;
        align = (chkid->type != __RAW_CHUNK__
                 || (chkid->type == __RAW_CHUNK__
                     && gloconf.read_modify_write));

        if (unlikely(align && !is_aligned(buf, real_offset))) {
                trans = 1;

                newbuf = &__tmp;
                ret = buffer_align_trans(buf, real_offset, newbuf, &newoff);
                if (unlikely(ret))
                        GOTO(err_ret, ret);

#if ENABLE_CHUNK_DEBUG
                DINFO("aio align read "CHKID_FORMAT" disk %u "
                                "raw (size %u chunk offset %d disk offset %ju) "
                                "align (size %u)\n",
                                CHKID_ARG(chkid), disk->idx,
                                buf->len, real_offset,
                                newbuf->len, newoff
                    );
#else
                DBUG("aio align read "CHKID_FORMAT" disk %u "
                                "raw (size %u chunk offset %d disk offset %ju) "
                                "align (size %u)\n",
                                CHKID_ARG(chkid), disk->idx,
                                buf->len, real_offset,
                                newbuf->len, newoff
                    );

#endif
                ret = __nvme_disk_aio_readv(disk, chkid, newbuf, newoff, 1);
                if (unlikely(ret))
                        GOTO(err_free, ret);

                seg = (seg_t *)newbuf->list.next;
                YASSERT(buf->len <= (seg->len - (real_offset - newoff)));
                mbuffer_get(buf, seg->handler.ptr + (real_offset - newoff), buf->len);
        } else {
                newbuf = (buffer_t *)buf;
                newoff = real_offset;
        }

        YASSERT(real_offset >= newoff);

        ret = __nvme_disk_aio_writev(disk, chkid, newbuf, newoff, prio);
        if (unlikely(ret)) {
                DERROR("disk[%u] ret %d %s\n", disk->idx, ret, strerror(ret));
                GOTO(err_free, ret);
        }

        if (unlikely(trans)) {
                mbuffer_free(newbuf);
        }

        return 0;
err_free:
        if (unlikely(trans)) {
                mbuffer_free(newbuf);
        }
err_ret:
        return ret;
}

#else
static int IO_FUNC nvme_disk_aio_writev(const disk_t *disk, const chkid_t *chkid,
                                        const buffer_t *buf, off_t real_offset, int prio)
{
        return __nvme_disk_aio_writev(disk, chkid, buf, real_offset, 1);
}
#endif

/*
 * path format: pci_p1_disk0_0000.82.00.04
 */
static int path2pci(char *path, char *pool, struct pci_slot_match *slot)
{
        int ret, count;
        char *pcibus;
        char *dev[4], *ptr = NULL;

        DINFO("initior nvme device %s\n", path);

        ptr = strstr(path, "pci");
        if (unlikely(ptr == NULL)) {
                ret = ENODEV;
                GOTO(err_ret, ret);
        }

        count = 4;
        _str_split(path, '_', dev, &count);

        if (count != 4) {
                DINFO("disk spdk arg fail %d\n", count);
                ret = EINVAL;
                GOTO(err_ret, ret);
        }

        strcpy(pool, dev[1]);
        // strcpy(disk, dev[2]);
        pcibus = dev[3];

        count = 4;
        _str_split(pcibus, '.', dev, &count);
        if (count != 4) {
                DINFO("disk spdk arg fail %d\n", count);
                ret = EINVAL;
                GOTO(err_ret, ret);
        }

        slot->domain = strtol(dev[0], NULL, 16);
        slot->bus = strtol(dev[1], NULL, 16);
        slot->dev = strtol(dev[2], NULL, 16);
        slot->func = strtol(dev[3], NULL, 16);

        return 0;
err_ret:
        return ret;
}

static int path2pciStr(char *path, char *pool, char *slot)
{
        int ret, count;
        char *pcibus;
        char *dev[4], *ptr = NULL;

        DINFO("initior nvme device %s\n", path);

        ptr = strstr(path, "pci");
        if (unlikely(ptr == NULL)) {
                ret = ENODEV;
                GOTO(err_ret, ret);
        }

        count = 4;
        _str_split(path, '_', dev, &count);

        if (count != 4) {
                DINFO("disk spdk arg fail %d\n", count);
                ret = EINVAL;
                GOTO(err_ret, ret);
        }

        strcpy(pool, dev[1]);
        // strcpy(disk, dev[2]);
        pcibus = dev[3];

        count = 4;
        _str_split(pcibus, '.', dev, &count);
        if (count != 4) {
                DINFO("disk spdk arg fail %d\n", count);
                ret = EINVAL;
                GOTO(err_ret, ret);
        }

        sprintf(slot, "pci://%s:%s:%s.%s", dev[0], dev[1], dev[2], dev[3]);

        return 0;
err_ret:
        return ret;
}

static int load_nvme_disk(disk_t *disk, const char *home, char *pool, uint64_t *disk_size)
{
        int ret, i, ns_id = 1,itry = 3;
        struct stat stbuf;
        struct nvme_ns *ns = NULL;
        struct nvme_ctrlr_opts opts;
        struct nvme_ns_stat nsstat;
        struct nvme_ctrlr_stat cstat;
        struct nvme_qpair *qpair = NULL;
        struct nvme_ctrlr *ctrlr = NULL;
        struct nvme_managemnt *nt = NULL;
        char path[PATH_MAX], slot[PATH_MAX];

        YASSERT(disk->idx < DISK_MAX);

        ret = ymalloc((void **)&nt, sizeof(struct nvme_managemnt));
        if (unlikely(ret))
                GOTO(err_ret, ret);

        sprintf(path, "%s/disk/%d.disk", home, disk->idx);

        ret = diskmd_real_path(path, &stbuf);

        if (unlikely(ret))
                GOTO(err_free, ret);

        ret = path2pciStr(path, pool, slot);
        if (unlikely(ret))
                GOTO(err_free, ret);

        memset(&opts, 0, sizeof(struct nvme_ctrlr_opts));
        opts.io_queues = 128;

        while(itry--) {
                ctrlr = nvme_ctrlr_open(slot, &opts);
                if (unlikely(!ctrlr)) {
                        DWARN("open pci device failed, path: %s\n", slot);
                } else
                        break;
        }
        if (unlikely(!ctrlr)) {
                DERROR("open pci device failed\n");
                ret = ENODEV;
                GOTO(err_free, ret);
        }

        if (nvme_ctrlr_stat(ctrlr, &cstat) != 0) {
                DERROR("Get NVMe controller %s info failed\n", path);
                ret = ENODEV;
                GOTO(err_free, ret);
        }

        if (cstat.io_qpairs != opts.io_queues)
                DINFO("Number of IO qpairs limited to %u\n", cstat.io_qpairs);

        DINFO("Attached NVMe controller %s (%s) (%u namespace%s)\n", cstat.mn, cstat.sn, cstat.nr_ns, (cstat.nr_ns > 1) ? "s" : "");

        /* Open the name space */
        ns = nvme_ns_open(ctrlr, ns_id);
        if (!ns) {
                DERROR("Open NVMe controller %s name space %u failed\n", slot, ns_id);
                ret = ENODEV;
                GOTO(err_free, ret);
        }

        if (nvme_ns_stat(ns, &nsstat) != 0) {
                DERROR("Get name space %u info failed\n", ns_id);
                ret = ENODEV;
                GOTO(err_free, ret);
        }

        qpair = nvme_ioqp_get(ctrlr, 0, 0);
        if (unlikely(!qpair)) {
                DERROR("Allocate I/O qpair failed\n");
                ret = ENODEV;
                GOTO(err_free, ret);
        }

        nt->ctrlr = ctrlr;
        nt->ns = ns;
        nt->qpair = qpair;

        for (i = 0; i < cpuset_useable(); i++) {
                nvme_add_ioqpair(i, disk->idx, nt);
        }

        disk->disk_fd = (void *)nt;
        disk->disk_type = __DISK_TYPE_NVME_DISK__;

        *disk_size = nsstat.sector_size * nsstat.sectors;

        DINFO("load disk[%u] tier %d size %ju, sector_size %ju\n", disk->idx, disk->tier, *disk_size, nsstat.sector_size);

        return 0;
err_free:
        yfree((void **)&nt);
err_ret:
        return ret;
}

static void nvme_disk_unload(disk_t *disk)
{
        int i;
        struct nvme_managemnt *nt = disk->disk_fd;

        DINFO("unload nvme disk.\r\n");
        if(!nt)
                DWARN("double free!!\r\n");

        for (i = 0; i < cpuset_useable(); i++) {
                nvme_pop_ioqpair(i, disk->idx);
        }

        if (nt) {
                if (nt->qpair)
                        nvme_ioqp_release(nt->qpair);

                if (nt->ns)
                        nvme_ns_close(nt->ns);

                if (nt->ctrlr)
                        nvme_ctrlr_close(nt->ctrlr);

                yfree((void **)&nt);
                disk->disk_fd = NULL;
        }

        return;
}

static int disk_nvme_connect(const disk_t *disk, disk_t *newdisk)
{
        core_t *core = core_self();

        DINFO("open disk[%d] core[%u]\n", disk->idx, core->hash);

        newdisk->disk_fd = disk->disk_fd;
        newdisk->disk_base_offset = disk->disk_base_offset;
        newdisk->idx = disk->idx;
        newdisk->dop = disk->dop;

        return 0;
}

static void disk_nvme_disconnect(disk_t *disk)
{
        core_t *core = core_self();

        DINFO("close disk[%d] core[%u]\n", disk->idx, core->hash);
}

static struct disk_op_t __nvme_disk_dop__ = {
        .open = load_nvme_disk,
        .close = nvme_disk_unload,

        .io_pread = nvme_disk_io_pread,
        .io_pwrite = nvme_disk_io_pwrite,
        .aio_readv = nvme_disk_aio_readv,
        .aio_writev = nvme_disk_aio_writev,

        .destroy = destroy,
        .offline = offline,
        .writeable = writeable,
        .create_new = initnew,
        .probe_check = normal_disk_probe_check,

        .connect = disk_nvme_connect,
        .disconnect = disk_nvme_disconnect
};

struct disk_op_t *get_nvme_disk_ops(void) 
{
        return &__nvme_disk_dop__; 
}
